source("../rscripts/package.R")
## Warning: package 'data.table' was built under R version 3.4.2
## Warning: package 'ggplot2' was built under R version 3.4.2

1 Load data

Yanni <- fread("C:/Users/pixel/Dropbox/Marc-Antoine/data/set2_Yannick_EGFNGFFGFsust_PC12/combined.data.long/new_sust_E_F_N.csv")
library(stringr)
# Create a column with usable names for conditions (XYYY, where E represents the growth factor and YYY its concentration)
gf <- str_extract(Yanni$Stim_All_Ch, "[E,F,N]")
conc <- str_extract(Yanni$Stim_All_Ch, "(([0-9]+\\.[0-9]*)|([0-9]+))")
Yanni$Condition <- paste(gf, conc, sep = "-")
Yanni[, c("TrackObjects_Label_uni","Condition") := list(as.factor(TrackObjects_Label_uni), as.factor(Condition))]
setkey(Yanni, Condition, TrackObjects_Label_uni)
setnames(Yanni, c("Intensity_MeanIntensity_Ratio", "TrackObjects_Label_uni"), c("Ratio","Label"))
setcolorder(Yanni, c("Condition", "Label","RealTime", "Ratio", "Metadata_Series", "Metadata_T", "TrackObjects_Label","Stim_All_Ch", "Stim_All_S"))
del.cols <- names(Yanni)[5:9]
Yanni[, (del.cols) := NULL]
rm(gf, conc, del.cols)

# Trim-axis, normalization fold change per trajectory
Yanni <- Yanni[RealTime <= 200]
Yanni <- myNorm(in.dt = Yanni, in.meas.col = "Ratio", in.rt.min = 0, in.rt.max = 36, in.by.cols = c("Condition", "Label"), in.type = "fold.change")

# Clip outliers and set NAs to reasonable value
Yanni[Ratio.norm >= 1.5, c("Ratio", "Ratio.norm") := list(median(Yanni$Ratio), median(Yanni$Ratio.norm))]
Yanni[is.na(Ratio.norm), c("Ratio", "Ratio.norm") := list(median(Yanni$Ratio, na.rm = T), median(Yanni$Ratio.norm, na.rm = T))]

head(Yanni)
##    Condition   Label RealTime    Ratio Ratio.norm
## 1:    E-0.25 12_0002        0 1183.396  1.0072021
## 2:    E-0.25 12_0002        2 1187.431  1.0106363
## 3:    E-0.25 12_0002        4 1172.491  0.9979203
## 4:    E-0.25 12_0002        6 1176.407  1.0012535
## 5:    E-0.25 12_0002        8 1172.421  0.9978608
## 6:    E-0.25 12_0002       10 1167.917  0.9940272
ggplot(Yanni, aes(y=Ratio.norm, x=RealTime)) + geom_line(aes(group=Label)) + facet_wrap(~Condition) + stat_summary(fun.y=mean, geom="line", colour = "red", size = 1.5) + theme(text = element_text(size = 25))

Only the NGF conditions are different from previous analysis.

2 Visualization PCA

2.1 Data trimming and fucntion definition

For better separation, we use a trimmed version of the dataset here:

Yanni_short <- Yanni[RealTime >= 25 & RealTime <= 100]
Yanni_short[, GF := str_extract(Condition, "[ENF]")]
Yanni_short[, Conc := str_extract(Condition, "[0-9]+(\\.[0-9]+)?")]
Yanni_short[, c("GF", "Conc") := list(as.factor(GF), as.factor(Conc))]

perform.PCA <- function(data, color = "Condition", na.fill = 1.1, value.var = "Ratio.norm", exp.var = c("Condition", "Label"), resp.var = "RealTime", plot.pca = F){
  require(ggbiplot)
  cast <- cast.and.fill(data, exp.var, resp.var, na.fill, value.var)
  pca <- prcomp(cast$mat2, center = T, scale = F)
  if(plot.pca) plot(pca)
  
  g <- ggbiplot(pca, obs.scale = 1, var.scale = 1, 
              groups = unlist(cast$mat[, color]), ellipse = TRUE, 
              circle = TRUE, choices = c(1,2))
  g <- g + scale_color_discrete(name = '')
  g <- g + theme(legend.direction = 'horizontal', 
               legend.position = 'top')
  return(g)
}

cast.and.fill <- function(data, exp.var = c("Condition", "Label"), resp.var = "RealTime", na.fill = 1.2, value.var = "Ratio.norm"){
  formula <- as.formula(paste0(paste(exp.var, collapse = " + "), " ~ ", resp.var))
  mat <- as.matrix(dcast(data, formula, value.var = value.var))
  mat2 <- mat[,-(1:length(exp.var))]
  class(mat2) <- "numeric"
  mat2[which(is.na(mat2))] <- na.fill
  return(list(mat = mat, mat2 = mat2))
}

2.2 All pooled

Elbow method points to consider only the 2st components!

perform.PCA(Yanni_short, plot.pca = T) + ggtitle("All pooled") + theme(text = element_text(size = 25))
## Warning: package 'scales' was built under R version 3.4.2

perform.PCA(Yanni_short, exp.var = c("Condition", "Label", "Conc", "GF"), plot.pca = F, color = "GF") + ggtitle("All pooled - Color GF") + theme(text = element_text(size = 25))

perform.PCA(Yanni_short, exp.var = c("Condition", "Label", "Conc", "GF"), plot.pca = F, color = "Conc") + ggtitle("All pooled - Color Concentration") + theme(text = element_text(size = 25))

PC1 separate based on what comes after the peak (sustained or adaptative), whereas PC2 separates based on peak shape right after pulse.

2.2.1 Extreme trajectories visualization

Let’s look at some representative trajectories:

visualize.extremes.PCA <- function(pca.object, matrix.data, PC, n, tails = "both", interact=T){
  # Visualize the n extreme individuals of the component PC. Tails indicates if the extremes are to be picked from left, right or both tails of PC. Matrix.data is the matrix in wide format that was used to run the pca analysis with stats::prcomp.
  
  ord <- order(pca.object$x[,PC])
  ordd <- rev(ord)
  if(tails=="both"){
    par(mfrow=c(1,2))
    for(i in 1:n){
      mini <-  min(matrix.data[ord[i],], matrix.data[ordd[i],])
      maxi <- max(matrix.data[ord[i],], matrix.data[ordd[i],])
      plot(matrix.data[ord[i],], type = "l", ylim = c(mini, maxi), xlab = "Time", ylab = "Trajectory", main = paste0("Negative Tail - Coord PC", PC, ": ", round(pca.object$x[ord[i], PC], 4)))
      plot(matrix.data[ordd[i],], type = "l", ylim = c(mini, maxi), xlab = "Time", ylab = "Trajectory", main = paste0("Positive Tail - Coord PC", PC, ": ", round(pca.object$x[ordd[i], PC], 4)))
      if(interact) readline(prompt="Press [enter] to see next plots")
    }
  }
  
  else if(tails=="positive"){
    for(i in 1:n){
      plot(matrix.data[ordd[i],], type = "l", xlab = "Time", ylab = "Trajectory", main = paste0("Positive Tail - Coord PC", PC, ": ", round(pca.object$x[ordd[i], PC], 4)))
      if(interact) readline(prompt="Press [enter] to see next plots")
    }
  }
  
  else if(tails=="positive"){
    for(i in 1:n){
      plot(matrix.data[ord[i],], type = "l", xlab = "Time", ylab = "Trajectory", main = paste0("Negative Tail - Coord PC", PC, ": ", round(pca.object$x[ord[i], PC], 4)))
      if(interact) readline(prompt="Press [enter] to see next plots")
    }
  }
}
temp <- cast.and.fill(Yanni_short, c("Condition", "Label"), "RealTime", median(Yanni_short$Ratio.norm), "Ratio.norm")
conditions <- temp$mat[,1]
labels <- temp$mat[,2]
pca <- prcomp(temp$mat2, center = T, scale = F)
visualize.extremes.PCA(pca, temp$mat2, PC=1, 3, interact = F)

PC1 separates constant and/or decreasing trajectories from the sustained ones.

visualize.extremes.PCA(pca, temp$mat2, PC=2, 3, interact = F, tail = "both")

PC2 separates short-term increase and long-term increase, this correlates quite well with growth factor which reacts with different lag.

2.3 EGF only

perform.PCA(Yanni_short[GF=="E"], plot.pca = T) + ggtitle("EGF only") + theme(text = element_text(size = 25))

temp <- cast.and.fill(Yanni_short[GF=="E"], c("Condition", "Label"), "RealTime", median(Yanni_short$Ratio.norm), "Ratio.norm")
conditions <- temp$mat[,1]
labels <- temp$mat[,2]
pca <- prcomp(temp$mat2, center = T, scale = F)
visualize.extremes.PCA(pca, temp$mat2, PC=1, 3, interact = F, tail = "both")

Visual inspection of more extremes show once again separation sustained/peaky as driving behaviour on PC1.

2.4 NGF only

perform.PCA(Yanni_short[GF=="N"], plot.pca = T) + ggtitle("NGF only") + theme(text = element_text(size = 25))

temp <- cast.and.fill(Yanni_short[GF=="N"], c("Condition", "Label"), "RealTime", median(Yanni_short$Ratio.norm), "Ratio.norm")
conditions <- temp$mat[,1]
labels <- temp$mat[,2]
pca <- prcomp(temp$mat2, center = T, scale = F)
visualize.extremes.PCA(pca, temp$mat2, PC=1, 3, interact = F, tail = "both")

Visual inspections of PC1 shows a clear separation of not reaction cells versus reacting cells (sustained response). Not peak response in the extremes.

2.5 FGF only

perform.PCA(Yanni_short[GF=="F"], plot.pca = T) + ggtitle("FGF only") + theme(text = element_text(size = 25))

temp <- cast.and.fill(Yanni_short[GF=="F"], c("Condition", "Label"), "RealTime", median(Yanni_short$Ratio.norm), "Ratio.norm")
conditions <- temp$mat[,1]
labels <- temp$mat[,2]
pca <- prcomp(temp$mat2, center = T, scale = F)
visualize.extremes.PCA(pca, temp$mat2, PC=1, 3, interact = F, tail = "both")

PC1 again peak versus sustained, PC2 separates cells based on amplitude of initial peak.

3 Separability measure along time - AUC

3.1 Compute AUC separability

sep.meas.along.time <- function(data1, data2, time.col, measure.col){
  timev <- sort(unique(data1[, get(time.col)]))
  if(!(identical(sort(unique(data2[, get(time.col)])), timev))) stop("Time vectors must be identical between the two data")
  out <- separability.measures(data1[get(time.col)==timev[1], get(measure.col)], data2[get(time.col)==timev[1], get(measure.col)])
  for(t in timev[2:length(timev)]){
    out <- rbind(out, separability.measures(data1[RealTime==t, get(measure.col)], data2[RealTime==t, get(measure.col)]))
  }
  out <- cbind(timev, out)
  return(out)
}
library(plyr)
# Get all pairs of conditions
conditions_EGF <- combn(as.character(unique(Yanni[, Condition]))[1:4], m = 2)
conditions_FGF <- combn(as.character(unique(Yanni[, Condition]))[5:8], m = 2)
conditions_NGF <- combn(as.character(unique(Yanni[, Condition]))[9:12], m = 2)

conditions <- cbind(conditions_EGF, conditions_NGF, conditions_FGF)
rm(conditions_EGF, conditions_FGF, conditions_NGF)
# Compute separabilities of conditions at each time point
sep.meas.raw <- apply(conditions, 2, function(x) sep.meas.along.time(Yanni[Condition==x[1]],  Yanni[Condition==x[2]], "RealTime", "Ratio" ))
names(sep.meas.raw) <- apply(conditions, 2, function(x) paste(x[1], x[2], sep = ","))

# Go to data table
for(i in 1:length(sep.meas.raw)){
  temp <- unlist(strsplit(names(sep.meas.raw)[i], ","))
  sep.meas.raw[[i]]$Cond1 <- temp[1]
  sep.meas.raw[[i]]$Cond2 <- temp[2]
}
sep.meas.raw <- as.data.table(rbind.fill(sep.meas.raw))
sep.meas.raw[, c("Cond1", "Cond2") := list(factor(Cond1, levels = levels(Yanni$Condition)), factor(Cond2, levels = levels(Yanni$Condition)))]
ggplot(sep.meas.raw, aes(x= timev, y = jm)) + geom_line() + facet_wrap(~Cond1 + Cond2, ncol = 6) + theme(text=element_text(size=25)) + ylab("Jeffries-Matusita [0, sqrt(2)]") + geom_vline(xintercept=40, colour="blue", linetype="longdash") + ggtitle("Separability along time - Raw value")

max.val <- length(unique(sep.meas.raw$timev)) * sqrt(2)
auc.raw <- sep.meas.raw[, .(auc = sum(jm, na.rm = T)/max.val), by = c("Cond1", "Cond2")]
auc.raw
##      Cond1 Cond2        auc
##  1: E-0.25 E-2.5 0.08693007
##  2: E-0.25  E-25 0.13288427
##  3: E-0.25 E-250 0.17862033
##  4:  E-2.5  E-25 0.03835482
##  5:  E-2.5 E-250 0.05603982
##  6:   E-25 E-250 0.01844898
##  7: N-0.25 N-2.5 0.07520494
##  8: N-0.25  N-25 0.18600523
##  9: N-0.25 N-250 0.23432389
## 10:  N-2.5  N-25 0.05978381
## 11:  N-2.5 N-250 0.07738626
## 12:   N-25 N-250 0.03450687
## 13: F-0.25 F-2.5 0.02287193
## 14: F-0.25  F-25 0.08858159
## 15: F-0.25 F-250 0.29692599
## 16:  F-2.5  F-25 0.05954491
## 17:  F-2.5 F-250 0.25576105
## 18:   F-25 F-250 0.09189585

Same values for EGF and NGF as the ones obtained with previous version of the dataset (sanity check OK, minor variations due to the way outliers were slightly differently handled).

3.2 Permutation test

one.permutation.auc <- function(x, y, metric){
  n <- nrow(x)
  m <- nrow(y)
  temp <- rbind(x, y)
  samp.traj <- sample(1:nrow(temp), size = n, replace = FALSE)
  x.resamp <- temp[samp.traj, ]
  y.resamp <- temp[setdiff(1:nrow(temp), samp.traj), ]

  seps <- sapply(1:ncol(x), function(j) separability.measures(x.resamp[, j], y.resamp[, j]))
  return(sum(unlist(seps[metric, ])))
}

permutation.auc <- function(x, y, n, metric = "jm"){
  # x,y: two matrices representing time series, row: trajectory; col: time
  # n: number of permutations
  # metric: one of "jm", "bh", "div", "tdiv", "ks"
  if(ncol(x) != ncol(y)) stop("x and y must have same number of columns")
  return(replicate(n, one.permutation.auc(x,y,metric)))
}

wrap_perm <- function(x, y, measure, n, na.fill){
  a <- as.matrix(dcast(x, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  b <- as.matrix(dcast(y, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  a[which(is.na(a))] <- na.fill
  b[which(is.na(b))] <- na.fill
  return(permutation.auc(a, b, n))
}
nperm <- 500
temp <- apply(conditions, 2, function(x) wrap_perm(Yanni[Condition==x[1]], Yanni[Condition==x[2]], "Ratio", nperm, median(Yanni$Ratio)))
temp <- temp/max.val
colnames(temp) <- apply(conditions, 2, paste, collapse=" ; ")
par(mfrow=c(3,6))
for(j in 1:ncol(temp)){
  hist(temp[,j], main = colnames(temp)[j], ylab="", xlab = "", cex.main = 3, cex.axis = 3)
  coln <- str_split(colnames(temp)[j], " ; ", simplify = T)
  abline(v = auc.raw[Cond1==coln[1] & Cond2==coln[2], auc], col = "red", lty = "dashed", lwd = 2)
  auc.raw[Cond1==coln[1] & Cond2==coln[2], p.val.permut := sum(temp[,j] >= auc)/nperm]
}

print(auc.raw)
##      Cond1 Cond2        auc p.val.permut
##  1: E-0.25 E-2.5 0.08693007        0.000
##  2: E-0.25  E-25 0.13288427        0.000
##  3: E-0.25 E-250 0.17862033        0.000
##  4:  E-2.5  E-25 0.03835482        0.004
##  5:  E-2.5 E-250 0.05603982        0.000
##  6:   E-25 E-250 0.01844898        0.126
##  7: N-0.25 N-2.5 0.07520494        0.000
##  8: N-0.25  N-25 0.18600523        0.000
##  9: N-0.25 N-250 0.23432389        0.000
## 10:  N-2.5  N-25 0.05978381        0.010
## 11:  N-2.5 N-250 0.07738626        0.000
## 12:   N-25 N-250 0.03450687        0.646
## 13: F-0.25 F-2.5 0.02287193        0.244
## 14: F-0.25  F-25 0.08858159        0.000
## 15: F-0.25 F-250 0.29692599        0.000
## 16:  F-2.5  F-25 0.05954491        0.000
## 17:  F-2.5 F-250 0.25576105        0.000
## 18:   F-25 F-250 0.09189585        0.000

3.3 Bootstrap per-column

one.bootstrap.auc.percol <- function(x, y, metric){
  samp.col <- sample(1:ncol(x), size = ncol(x), replace = TRUE)
  x.resamp <- x[, samp.col]
  y.resamp <- y[, samp.col]
  seps <- sapply(1:ncol(x), function(j) separability.measures(x.resamp[, j], y.resamp[, j]))
  return(sum(unlist(seps[metric, ])))
}

bootstrap.auc.percol <- function(x, y, B, metric = "jm"){
  # x,y: two matrices representing time series, row: trajectory; col: time
  # B: number of boostraps
  # metric: one of "jm", "bh", "div", "tdiv", "ks"
  if(ncol(x) != ncol(y)) stop("x and y must have same number of columns")
  return(replicate(B, one.bootstrap.auc.percol(x,y,metric)))
}

wrap_bootcol <- function(x, y, measure, n, na.fill){
  a <- as.matrix(dcast(x, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  b <- as.matrix(dcast(y, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  a[which(is.na(a))] <- na.fill
  b[which(is.na(b))] <- na.fill
  return(bootstrap.auc.percol(a, b, n))
}
nboot <- 500
bootcol <- apply(conditions, 2, function(x) wrap_bootcol(Yanni[Condition==x[1]], Yanni[Condition==x[2]], "Ratio", nboot, median(Yanni$Ratio)))
bootcol <- bootcol/max.val
colnames(bootcol) <- apply(conditions, 2, paste, collapse=";")
par(mfrow=c(3,6))
for(j in 1:ncol(bootcol)){
  hist(bootcol[,j], main = colnames(bootcol)[j], ylab="", xlab = "", cex.main = 3, cex.axis = 3)
  coln <- str_split(colnames(bootcol)[j], " ; ", simplify = T)
  abline(v = auc.raw[Cond1==coln[1] & Cond2==coln[2], auc], col = "red", lty = "dashed", lwd = 2)
}

3.4 Bootstrap per-row

one.bootstrap.auc.perrow <- function(x, y, metric){
  samp.rowx <- sample(1:nrow(x), size = nrow(x), replace = TRUE)
  samp.rowy <- sample(1:nrow(y), size = nrow(y), replace = TRUE)
  x.resamp <- x[samp.rowx, ]
  y.resamp <- y[samp.rowy, ]
  seps <- sapply(1:ncol(x), function(j) separability.measures(x.resamp[, j], y.resamp[, j]))
  return(sum(unlist(seps[metric, ])))
}

bootstrap.auc.perrow <- function(x, y, B, metric = "jm"){
  # x,y: two matrices representing time series, row: trajectory; col: time
  # B: number of boostraps
  # metric: one of "jm", "bh", "div", "tdiv", "ks"
  if(ncol(x) != ncol(y)) stop("x and y must have same number of columns")
  return(replicate(B, one.bootstrap.auc.perrow(x,y,metric)))
}

wrap_bootrow <- function(x, y, measure, n, na.fill){
  a <- as.matrix(dcast(x, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  b <- as.matrix(dcast(y, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  a[which(is.na(a))] <- na.fill
  b[which(is.na(b))] <- na.fill
  return(bootstrap.auc.perrow(a, b, n))
}
bootrow <- apply(conditions, 2, function(x) wrap_bootrow(Yanni[Condition==x[1]], Yanni[Condition==x[2]], "Ratio", nboot, median(Yanni$Ratio)))
bootrow <- bootrow/max.val
colnames(bootrow) <- apply(conditions, 2, paste, collapse=" ; ")
par(mfrow=c(3,6))
for(j in 1:ncol(bootrow)){
  hist(bootrow[,j], main = colnames(bootrow)[j], ylab="", xlab = "", cex.main = 3, cex.axis = 3)
  coln <- str_split(colnames(bootrow)[j], " ; ", simplify = T)
  abline(v = auc.raw[Cond1==coln[1] & Cond2==coln[2], auc], col = "red", lty = "dashed", lwd = 2)
}

3.5 Confidence intervals from bootstraps

# Get CI directly from bootstrap quantiles
ci.bootcol <- apply(bootcol, 2, quantile, probs = c(0.025, 0.975))
ci.bootrow <- apply(bootrow, 2, quantile, probs = c(0.025, 0.975))

# Plotting variables
auc.raw$ci.low.col <- ci.bootcol[1,]
auc.raw$ci.high.col <- ci.bootcol[2,]
auc.raw$ci.low.row <- ci.bootrow[1,]
auc.raw$ci.high.row <- ci.bootrow[2,]
auc.raw[, comb.cond := as.factor(paste(Cond1, Cond2, sep = ";"))]
auc.raw[, GF := as.factor(paste0(str_sub(Cond1,1,1), "GF"))]
print(auc.raw)
##      Cond1 Cond2        auc p.val.permut ci.low.col ci.high.col
##  1: E-0.25 E-2.5 0.08693007        0.000 0.07821636  0.09638171
##  2: E-0.25  E-25 0.13288427        0.000 0.10900625  0.15972622
##  3: E-0.25 E-250 0.17862033        0.000 0.14665817  0.21409319
##  4:  E-2.5  E-25 0.03835482        0.004 0.03072085  0.04614385
##  5:  E-2.5 E-250 0.05603982        0.000 0.03765626  0.07371448
##  6:   E-25 E-250 0.01844898        0.126 0.01544975  0.02171374
##  7: N-0.25 N-2.5 0.07520494        0.000 0.06361726  0.08819693
##  8: N-0.25  N-25 0.18600523        0.000 0.16327823  0.21218236
##  9: N-0.25 N-250 0.23432389        0.000 0.20222088  0.27518145
## 10:  N-2.5  N-25 0.05978381        0.010 0.04786498  0.07580265
## 11:  N-2.5 N-250 0.07738626        0.000 0.05568283  0.10160889
## 12:   N-25 N-250 0.03450687        0.646 0.03061363  0.03780459
## 13: F-0.25 F-2.5 0.02287193        0.244 0.01843512  0.02832020
## 14: F-0.25  F-25 0.08858159        0.000 0.07207104  0.10534756
## 15: F-0.25 F-250 0.29692599        0.000 0.25081409  0.33451269
## 16:  F-2.5  F-25 0.05954491        0.000 0.04927623  0.06982827
## 17:  F-2.5 F-250 0.25576105        0.000 0.22452249  0.28754597
## 18:   F-25 F-250 0.09189585        0.000 0.08072031  0.10046864
##      ci.low.row ci.high.row    comb.cond  GF
##  1: 0.044797864  0.16300563 E-0.25;E-2.5 EGF
##  2: 0.081433578  0.21956663  E-0.25;E-25 EGF
##  3: 0.126116221  0.26081137 E-0.25;E-250 EGF
##  4: 0.029572430  0.07577877   E-2.5;E-25 EGF
##  5: 0.041493263  0.09646108  E-2.5;E-250 EGF
##  6: 0.011994838  0.05705320   E-25;E-250 EGF
##  7: 0.051927738  0.12109404 N-0.25;N-2.5 NGF
##  8: 0.146560266  0.29189411  N-0.25;N-25 NGF
##  9: 0.177139421  0.33012985 N-0.25;N-250 NGF
## 10: 0.046040140  0.13908065   N-2.5;N-25 NGF
## 11: 0.053249228  0.12540128  N-2.5;N-250 NGF
## 12: 0.007923069  0.15249392   N-25;N-250 NGF
## 13: 0.017818118  0.08157346 F-0.25;F-2.5 FGF
## 14: 0.059515002  0.17006970  F-0.25;F-25 FGF
## 15: 0.213504987  0.42617843 F-0.25;F-250 FGF
## 16: 0.033000282  0.13186786   F-2.5;F-25 FGF
## 17: 0.176389424  0.38535196  F-2.5;F-250 FGF
## 18: 0.044771740  0.18583738   F-25;F-250 FGF
ggplot(auc.raw, aes(x=Cond1, y=Cond2)) + geom_raster(aes(fill=auc)) + facet_wrap(~GF, scales = "free")

ggplot(auc.raw, aes(x=comb.cond, y=auc, group = 1)) + 
    geom_errorbar(aes(ymin=ci.low.col, ymax=ci.high.col)) +
    geom_point(size = 5, shape=21, fill = "white" ) +
    facet_grid(.~GF, scales = "free") + 
    ggtitle("CI based on per column bootstraps") +
    theme(strip.text.x = element_text(size=30), axis.text = element_text(size=13), title = element_text(size=30))

ggplot(auc.raw, aes(x=comb.cond, y=auc, group = 1)) + 
    geom_errorbar(aes(ymin=ci.low.row, ymax=ci.high.row)) +
    geom_point(size = 5, shape=21, fill = "white" ) +
    facet_grid(.~GF, scales = "free") + 
    ggtitle("CI based on per row bootstraps") +
    theme(strip.text.x = element_text(size=30), axis.text = element_text(size=13), title = element_text(size=30))

3.6 Hierarchical clustering using AUC as distance metric

# Reshape to "diagonal"
dist.raw <- dcast(data=auc.raw[,1:3], formula = Cond1 ~ Cond2, value.var = "auc")
row_nams <- c(dist.raw[,1])$Cond1
dist.raw <- dist.raw[,-1]
dist.raw
##         E-2.5       E-25      E-250      F-2.5       F-25      F-250
## 1: 0.08693007 0.13288427 0.17862033         NA         NA         NA
## 2:         NA 0.03835482 0.05603982         NA         NA         NA
## 3:         NA         NA 0.01844898         NA         NA         NA
## 4:         NA         NA         NA 0.02287193 0.08858159 0.29692599
## 5:         NA         NA         NA         NA 0.05954491 0.25576105
## 6:         NA         NA         NA         NA         NA 0.09189585
## 7:         NA         NA         NA         NA         NA         NA
## 8:         NA         NA         NA         NA         NA         NA
## 9:         NA         NA         NA         NA         NA         NA
##         N-2.5       N-25      N-250
## 1:         NA         NA         NA
## 2:         NA         NA         NA
## 3:         NA         NA         NA
## 4:         NA         NA         NA
## 5:         NA         NA         NA
## 6:         NA         NA         NA
## 7: 0.07520494 0.18600523 0.23432389
## 8:         NA 0.05978381 0.07738626
## 9:         NA         NA 0.03450687
# Isolate EGF, go to square symmetric matrix
temp <- as.matrix(dist.raw[1:3,1:3])
rownames(temp) <- row_nams[1:3]
indices <- unique(c(rownames(temp), colnames(temp)))
mat.EGF <- matrix(0, nrow=length(indices), ncol=length(indices),dimnames = list(indices,indices))
mat.EGF[upper.tri(mat.EGF, diag = F)] <- temp[upper.tri(temp, diag=T)]
mat.EGF <- mat.EGF + t(mat.EGF)
# Hierarchical clustering
plot(as.dendrogram(hclust(as.dist(mat.EGF))), ylim = c(0,0.3), main = "AUC as distance matrix - EGF")

# Isolate FGF, go to square symmetric matrix
temp <- as.matrix(dist.raw[4:6,4:6])
rownames(temp) <- row_nams[4:6]
indices <- unique(c(rownames(temp), colnames(temp)))
mat.FGF <- matrix(0, nrow=length(indices), ncol=length(indices),dimnames = list(indices,indices))
mat.FGF[upper.tri(mat.FGF, diag = F)] <- temp[upper.tri(temp, diag=T)]
mat.FGF <- mat.FGF + t(mat.FGF)
# Hierarchical clustering
plot(as.dendrogram(hclust(as.dist(mat.FGF))), ylim = c(0,0.3), main = "AUC as distance matrix - FGF")

# Isolate NGF, go to square symmetric matrix
temp <- as.matrix(dist.raw[7:9,7:9])
rownames(temp) <- row_nams[7:9]
indices <- unique(c(rownames(temp), colnames(temp)))
mat.NGF <- matrix(0, nrow=length(indices), ncol=length(indices),dimnames = list(indices,indices))
mat.NGF[upper.tri(mat.NGF, diag = F)] <- temp[upper.tri(temp, diag=T)]
mat.NGF <- mat.NGF + t(mat.NGF)
# Hierarchical clustering
plot(as.dendrogram(hclust(as.dist(mat.NGF))), ylim = c(0,0.3), main = "AUC as distance matrix - NGF")